Data Importing and Cleaning

res = GET('https://developer.nps.gov/api/v1/parks?limit=500&api_key=B9nDpbkbrb3kSOjz6kXSxMJ3d6MSpUvt1QqYdeyn')

data = res %>% 
  content("text") %>% 
  jsonlite::fromJSON() %>% 
  as_tibble()

NPS_data = data %>% 
  unnest(data) %>% 
  select(fullName,latitude,longitude,topics, activities,states, parkCode) %>%  janitor::clean_names() %>% 
  mutate(
    latitude = as.numeric(latitude), 
    longitude = as.numeric(longitude)
  ) %>% 
  unnest(activities, names_sep = "_") %>% 
  unnest(topics, names_sep = "_")
visitation_data <- 
  read_csv("data/Query Builder for Public Use Statistics (1979 - Last Calendar Year).csv") %>% 
  janitor::clean_names() %>% 
  mutate(unit_code = tolower(unit_code)) %>% 
  rename(park_code = unit_code, 
         full_name = park_name) %>% 
  select(full_name, park_code, park_type, region, state, year, month, recreation_visits, tent_campers, rv_campers, tent_campers, backcountry)

combined_data <- full_join(NPS_data, visitation_data, by = c("park_code"))

Regional Comparisons

region_data_vists <- visitation_data %>% 
  mutate(region = case_when(
    state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    TRUE ~ "no state data"
  ))
region_data <- combined_data %>% 
  mutate(region = case_when(
    state %in% 
      c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    state %in% 
      c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO",
        "NE", "ND", "SD") ~ "midwest", 
    state %in% 
      c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", 
        "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    state %in% c("AK", "CA", "HI", "OR", "WA", 
                 "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    state == TRUE ~ "no state data"
  )) %>% select(c(-full_name.y, -topics_id, -topics_name, -activities_id))

Northeast

region_data %>%
  filter(region == "northeast") %>% 
  distinct(activities_name) %>% 
  head() %>% knitr::kable()
activities_name
Arts and Culture
Cultural Demonstrations
Astronomy
Stargazing
Biking
Boating
northeast_plot <- region_data %>%
  filter(region == "northeast") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in Northeast by Season", 
       x = "Season", 
       y = "Average Visitation")
northeast_plot

Midwest

region_data %>%
  filter(region == "midwest") %>% distinct(activities_name) %>% 
  head() %>%  knitr::kable()
activities_name
Arts and Culture
Cultural Demonstrations
Astronomy
Stargazing
Food
Picnicking
midwest_plot <- 
  region_data %>%
  filter(region == "midwest") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in Midwest by Season", 
       x = "Season", 
       y = "Average Visitation")
midwest_plot

South

region_data %>%
  filter(region == "south") %>%
  distinct(activities_name) %>% 
  head() %>%  
  knitr::kable()
activities_name
Astronomy
Stargazing
Food
Picnicking
Guided Tours
Self-Guided Tours - Walking
south_plot <- 
  region_data %>%
  filter(region == "south") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation South by Season", 
       x = "Season", 
       y = "Average Visitation")
south_plot

West

region_data %>% filter(region == "west") %>% distinct(activities_name) %>% 
  head() %>% knitr::kable()
activities_name
Arts and Culture
Astronomy
Stargazing
Biking
Camping
Backcountry Camping
west_plot <- 
  region_data %>% 
  filter(region == "west") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in West by Season", 
       x = "Season", 
       y = "Average Visitation") 

northeast_plot + midwest_plot + south_plot + west_plot

Across all regions, it seems that summer has the highest visitation across regions, with the exception of backcountry hiking, which has a peak in spring.

For the Northeastern U.S. tent camping is by far the most popular visitation type. Interestingly, in this region of the United States, there was hardly any visitation in winter in all three visitation types. This might be due to the fact that the Northeastern U.S. typically has colder temperatures and possibly snow during this time.

For the Midwestern U.S., tent camping is the most popular visitation type, especially in the summer, but it is closely followed by backcountry hiking in summer. RV camping does seem to be higher in the Midwest than in the northeastern region, especially during the summer. As before, there is hardly any visitation in the winter, and actually none for RV camping. This again might be due to weather conditions not permitting outdoor activities such as these. For the Southern U.S., tent camping has the highest average visitation in the summer time, closely followed by RV camping. In the south, RV camping is much more popular than in other regions around the U.S. Backcountry is lower, but does have an almost even spread among the seasons. This could be due to the more temperate conditions in the winter and other times of the year, allowing individuals to enjoy this activity year round.

For the Western U.S., ten camping once again has the highest average visitation across all seasons. Interestingly, backcountry hiking and RV camping are equal in the summer time in this region. This region has some visitation in the winter, but less than the southern region.

Comparing Regions

region_data %>% group_by(region) %>% 
  summarize(avg_visitation = mean(recreation_visits, na.rm = TRUE)) %>% 
  arrange(desc(avg_visitation)) %>% knitr::kable()
region avg_visitation
west 149106.067
south 95613.721
northeast 83202.371
midwest 44547.512
u.s. territory 31867.809
NA 4264.793
region_long <- region_data %>%
  pivot_longer(cols = c(recreation_visits, tent_campers, rv_campers, backcountry), 
               names_to = "visit_type", 
               values_to = "count") 

The western region of the U.S. seems to have the highest average visitation of all regions, followed by the southern region.

Looking at the amount of parks in each region will be helpful to determine if this is due to the fact that there are more parks in this region, or if there is something else going on.

region_data %>% 
  group_by(region) %>% 
  summarize(parks = n_distinct(park_code)) %>% knitr::kable()
region parks
midwest 46
northeast 68
south 117
u.s. territory 7
west 116
NA 122

From this table, we can see that actually the southern region has the most amount of parks in this dataset, followed by the western region. So this trend is visitation is not just due to the amount of parks in a specific region.

region_data %>%
  group_by(region) %>%
  summarize(
    mean_tent = mean(tent_campers, na.rm = TRUE),
    mean_backcountry = mean(backcountry, na.rm = TRUE),
    mean_rv = mean(rv_campers, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = starts_with("mean_"),
    names_to = "type_visit",
    values_to = "mean",
    names_prefix = "mean_"
  ) %>% ggplot(aes(x = region, y = mean)) + geom_col() + facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Now looking at the specific visits by region, we can see once again that west has the highest mean visitation among all the regions, with its highest being tent camping. Unsurprisingly, we see that the south has the second highest mean visitation. Interestingly, in backcountry hiking, the midwest has the second highest mean visitation, despite the fact that it has one of the lowest amount of parks in the dataset.

US National Parks by Region

regional_data <- NPS_data %>% 
  mutate(region = case_when(
    states %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    states %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    states %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    states %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    states %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    TRUE ~ "no state data"
  ))

g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showland = TRUE,
  landcolor = toRGB("#e5ecf6"),
  countrywidth = 0.5,
  subunitwidth = 0.5
)

fig <- plot_geo(regional_data, lat = ~latitude, lon = ~longitude)
fig <- fig %>% add_markers(
    text = ~paste(full_name, states, sep = "<br />"),
    color = ~region, symbol = I("circle"), size = I(8), hoverinfo = "text"
  )

fig <- fig %>% layout(
    title = 'US National Parks', geo = g
  )
fig

Finally, we wanted a way to visualize the national parks on a U.S. map.